The dataset considered is the Steam Video Games Dataset. This dataset is a list of user behaviors, with columns: user-id, game-title, behavior-name, value. The behaviors included are ‘purchase’ and ‘play’. The value indicates the degree to which the behavior was performed - in the case of ‘purchase’ the value is always 1, and in the case of ‘play’ the value represents the number of hours the user has played the game.
raw_data = as_tibble(
read.csv("steam-200k.csv", header=F,
col.names = c( "user-id","game-title", "behavior-name", "value", "unknown")
)
) %>% select(-unknown)
head(raw_data)
line_data =
raw_data %>%
filter(behavior.name == "play") %>%
group_by(game.title) %>%
count() %>%
ungroup() %>%
arrange(desc(n)) %>%
mutate(rnum=row_number())
fig <- plot_ly(line_data, x = ~rnum)
fig <- fig %>% add_lines(y = ~n)
fig <- fig %>% layout(
title = "Most Popular Games",
xaxis = list(
# TODO add buttons
# TODO add game title with mouse over
#rangeselector = list(
# #buttons = list(
# # list(
# # #count = 3,
# # label = "3 mo",
# # #step = 1,
# # #stepmode = "backward"
# # ),
# # #list(step = "all"))),
# # list(label="lol"))),
title = "Games by Popularity",
rangeslider = list(type = "int")
),
yaxis = list(title = "Num. Of Players"))
fig
play_time =
raw_data %>%
filter(behavior.name == "play") %>%
select(-behavior.name) %>%
group_by(user.id) %>%
mutate(total_time = sum(value)) %>%
ungroup() %>%
mutate(perc_time = value/total_time) %>%
select(user.id, game.title, time=value, perc_time, total_time)
games_play_time = play_time %>%
select(-user.id, -time, -perc_time) %>%
group_by(game.title) %>%
summarize(total_time = sum(total_time)) %>%
ungroup() %>%
arrange(desc(total_time)) %>%
mutate(rnum = row_number())
considered_n = 21
to_plot = games_play_time %>% filter(rnum <= considered_n)
to_plot[considered_n,]$game.title = "Others"
to_plot[considered_n,]$total_time =
games_play_time %>% filter(rnum >= considered_n) %>% summarize(total_time=sum(total_time)) %>% pull(total_time)
# TODO fare passaggio con bottone da un grafico all'altro
# TODO aggiungere label in cima a colonne
# TODO si puo' creare una colonna "spezzata" ad indicare che sarebbe molto piu' alta ed in cima ci metti il valore "fuori scala"?
to_plot %>%
ggplot(aes(x=rnum, y=total_time)) +
geom_col()
to_plot %>%
filter(rnum != considered_n) %>%
ggplot(aes(x=rnum, y=total_time)) +
geom_col()
if (nrow(user_play_time) > 10) {
perc_to_show=0.90
to_plot = user_play_time %>%
select(game.title, perc_time, time) %>%
mutate(cum_perc_time=cumsum(perc_time)) %>%
arrange(cum_perc_time) %>%
filter(cum_perc_time <= perc_to_show)
to_plot = to_plot %>%
add_row(game.title="Others",
perc_time=1.0 - max(to_plot$cum_perc_time),
time = (summarize(user_play_time, sum(time)) - summarize(to_plot, sum(time))) %>% pull(),
cum_perc_time=1.0)
} else {
to_plot = user_play_time %>%
select(game.title, perc_time, time) %>%
mutate(cum_perc_time=cumsum(perc_time)) %>%
arrange(cum_perc_time)
}
# Se il giocatore ha giocato a tantissimi giochi distribuendo il tempo
if (nrow(to_plot) > 10) {
too_big_to_plot = to_plot %>%
arrange(desc(perc_time)) %>%
select(-cum_perc_time) %>%
mutate(nrow = row_number())
#too_big_to_plot
to_plot = too_big_to_plot %>%
filter(nrow < 8) %>%
select(game.title, perc_time, time) %>%
mutate(cum_perc_time=cumsum(perc_time)) %>%
arrange(cum_perc_time)
to_plot = to_plot %>%
add_row(game.title="Others",
perc_time=1.0 - max(to_plot$cum_perc_time),
time = (summarize(user_play_time, sum(time)) - summarize(to_plot, sum(time))) %>% pull(),
cum_perc_time=1.0)
#to_plot
}
to_plot = to_plot %>%
#to_plot %>%
mutate(label=scales::percent(perc_time)) %>%
mutate(label=paste(label, paste(time,"h",sep=""), sep="\n")) %>%
select(-cum_perc_time) %>%
arrange(desc(game.title)) %>%
mutate(lab.ypos = cumsum(perc_time) - perc_time/2)
to_plot
# TODO fare trucchetto per ordinare legenda
to_plot %>%
ggplot(aes(x = "", y = perc_time, fill = game.title)) +
geom_bar(width = 1, stat = "identity", color = "white") +
coord_polar("y") +
#geom_text(aes(y = lab.ypos, label = label), color = "white") +
#geom_text(aes(y = lab.ypos, label = label), color = "black") +
#geom_text(aes(x=rep(1.3, length(lab.ypos)), y = lab.ypos, label = label), color = "black") +
#geom_text(aes(x=1-perc_time*.02, y = lab.ypos, label = label), color = "black") +
#geom_text(aes(x=max(1, 1-perc_time*2), y = lab.ypos, label = label), color = "black") +
#geom_text(aes(x=0.3 + (1-perc_time*.5), y = lab.ypos, label = label), color = "black") +
geom_text(aes(x=0.3 + (1-perc_time*.6), y = lab.ypos, label = label), color = "black") +
#scale_fill_manual(values= brewer.pal(n = 5, name = "RdBu")) +
#scale_fill_manual(values= brewer.pal(n = 5, name = "Dark2")) +
scale_fill_manual(values= brewer.pal(n=nrow(to_plot), name = "Pastel1")) +
#scale_fill_manual(values= brewer.pal(n = 5, name = "Pastel2")) +
labs(
title = paste("Play Time of User", user_play_time %>% pull(user.id)),
fill="Game Titles"
) +
theme_void()
raw_data %>% head()